from bs4 import BeautifulSoup
import requests
import time
import array

def get_url(arr):
    url = arr['page']
    count = arr['count']
    ret = requests.get(url).content.decode('utf-8','ignore')
    soup = BeautifulSoup(ret)
    text = soup.find_all('div',class_='pt20 clearfix')

    with open('wood365_14.txt','a') as w:
        for item in text:
            aes = item.find_all('a')
            print(aes[0]['href'])
            w.write(aes[0]['href'])
            w.write('\n')
        count += 1
        next_page = 'https://www.wood365.cn/Biz/buy_%d' % (count) + '.html'
        print(next_page)
        arr = {"page": next_page, "count": count}
        return arr


def get_all_url():
    """
    """
    arr = {"page": 'https://www.wood365.cn/Biz/buy_14.html', "count": 14}
    next_page = get_url(arr)
    i = 0
    while next_page:
        next_page = get_url(next_page)
        time.sleep(1)
        print(next_page)
        i += 1
        if i % 10 == 0:
            time.sleep(600)

def save_detail():
    with open('wood365_14.txt' , 'r') as r:
        i = 0
        with open('wood365_14_detail.txt','w') as w:
            url = r.readline()
            while url:
                ret = requests.get('https://www.wood365.cn' + url.strip()).content.decode('utf-8','ignore')
                soup = BeautifulSoup(ret)
                info = soup.find('div', class_='fl shuoming clearfix')
                name = info.find('h2',class_='fl')
                text = info.find('p',class_='mt35')
                goods_name = name.get_text()
                goods_text = text.get_text()
                res = goods_name + '&&&' + goods_text
                w.write(res)
                w.write("\n")
                url = r.readline()
                i += 1
                print(i)
                time.sleep(5)


if __name__ == "__main__":
    # save_detail()
    get_all_url()
    # aa = get_url("https://www.wood365.cn/Biz/buy_14.html",14)
